POST
/
1
/
crawlers
/
{id}
/
test
curl --request POST \
  --url https://crawler.algolia.com/api/1/crawlers/{id}/test \
  --header 'Authorization: Basic <encoded-value>' \
  --header 'Content-Type: application/json' \
  --data '{
  "url": "https://www.algolia.com/blog",
  "config": {
    "actions": [
      {
        "autoGenerateObjectIDs": true,
        "cache": {
          "enabled": true
        },
        "discoveryPatterns": [
          "https://www.algolia.com/**"
        ],
        "fileTypesToMatch": [
          "html",
          "pdf"
        ],
        "hostnameAliases": {
          "dev.example.com": "example.com"
        },
        "indexName": "algolia_website",
        "name": "<string>",
        "pathAliases": {
          "example.com": {
            "/foo": "/bar"
          }
        },
        "pathsToMatch": [
          "https://www.algolia.com/**"
        ],
        "recordExtractor": {
          "__type": "function",
          "source": "<string>"
        },
        "schedule": "<string>",
        "selectorsToMatch": [
          ".products",
          "!.featured"
        ]
      }
    ],
    "apiKey": "<string>",
    "appId": "<string>",
    "exclusionPatterns": [
      "https://www.example.com/excluded",
      "!https://www.example.com/this-one-url",
      "https://www.example.com/exclude/**"
    ],
    "externalData": [
      "testCSV"
    ],
    "extraUrls": [
      "<string>"
    ],
    "ignoreCanonicalTo": true,
    "ignoreNoFollowTo": true,
    "ignoreNoIndex": true,
    "ignorePaginationAttributes": true,
    "ignoreQueryParams": [
      "ref",
      "utm_*"
    ],
    "ignoreRobotsTxtRules": true,
    "indexPrefix": "crawler_",
    "initialIndexSettings": {},
    "linkExtractor": {
      "__type": "function",
      "source": "({ $, url, defaultExtractor }) => {\n  if (/example.com\\/doc\\//.test(url.href)) {\n    // For all pages under `/doc`, only extract the first found URL.\n    return defaultExtractor().slice(0, 1)\n  }\n  // For all other pages, use the default.\n  return defaultExtractor()\n}\n"
    },
    "login": {
      "url": "https://example.com/secure/login-with-post",
      "requestOptions": {
        "method": "POST",
        "headers": {
          "Content-Type": "application/x-www-form-urlencoded"
        },
        "body": "id=my-id&password=my-password",
        "timeout": 5000
      }
    },
    "maxDepth": 5,
    "maxUrls": 250,
    "rateLimit": 4,
    "renderJavaScript": true,
    "requestOptions": {
      "proxy": "<string>",
      "timeout": 30000,
      "retries": 3,
      "headers": {
        "Accept-Language": "fr-FR",
        "Authorization": "Bearer Aerehdf==",
        "Cookie": "session=1234"
      }
    },
    "safetyChecks": {
      "beforeIndexPublishing": {
        "maxLostRecordsPercentage": 10,
        "maxFailedUrls": 123
      }
    },
    "saveBackup": true,
    "schedule": "every weekday at 12:00 pm",
    "sitemaps": [
      "https://example.com/sitemap.xyz"
    ],
    "startUrls": [
      "https://www.example.com"
    ]
  }
}'
{
  "startDate": "2024-04-02T15:34:29Z",
  "endDate": "2024-04-02T15:34:29Z",
  "logs": [
    [
      "Processing url 'https://www.algolia.com/blog'"
    ]
  ],
  "records": [
    {
      "indexName": "testIndex",
      "records": [
        {
          "objectID": "https://www.algolia.com/blog",
          "numberOfLinks": 2
        }
      ],
      "recordsPerExtractor": [
        {
          "index": 0,
          "type": "custom",
          "records": [
            {
              "objectID": "https://www.algolia.com/blog"
            }
          ]
        }
      ]
    }
  ],
  "links": [
    "https://blog.algolia.com/challenging-migration-heroku-google-kubernetes-engine/",
    "https://blog.algolia.com/tale-two-engines-algolia-unity/"
  ],
  "externalData": {
    "externalData1": {
      "data1": "val1",
      "data2": "val2"
    },
    "externalData2": {
      "data1": "val1",
      "data2": "val2"
    }
  },
  "error": {}
}

Authorizations

Authorization
string
header
required

Basic authentication header of the form Basic <encoded-value>, where <encoded-value> is the base64-encoded string username:password.

Path Parameters

id
string
required

Crawler ID.

Example:

"e0f6db8a-24f5-4092-83a4-1b2c6cb6d809"

Body

application/json

Response

200
application/json

OK

The response is of type object.